###This file replicates the analysis of employment effects presented in the SI (section 3.2)

#load some packages
library(tidyverse)
library(data.table)
library(feather)
library(sandwich)
library(lmtest)
library(ggplot2)
library(stargazer)
library(plm) 
library(xtable)
library(lfe)

df <- data.table(read_feather("Data/NY_Voters_and_Employees.feather"))
everyone <- df

#we only want people that earned above the minimum wage
everyone[, minsalary:= min(BaseSalary.2011,BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]
dim(everyone); everyone <- everyone[!(minsalary <7.25),]; dim(everyone)
everyone[, MaxSalary:= max(BaseSalary.2011, BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]


##add the undermin variable
everyone[!is.na(BaseSalary.2014) | !is.na(BaseSalary.2015) | !is.na(BaseSalary.2016), UnderMin:= FALSE] #everyone working in this period
everyone[(!is.na(BaseSalary.2014) | !is.na(BaseSalary.2015) | !is.na(BaseSalary.2016)) &
           BaseSalary.2013 < 8.00 , UnderMin:= TRUE]

library(ggplot2)
library(ggthemes)

wage_dens_df_long <- data.table::melt(everyone, id.vars=c("employeekey", 'UnderMin'), measure.vars = list(BaseSalary = c("BaseSalary.2013", "BaseSalary.2017"),
                                                                LeaveStatus=c('LeaveStatus.2013', 'LeaveStatus.2017')))

wage_dens_df_long$Year <- recode(wage_dens_df_long$variable, '1'='2013', '2'='2017')
wage_dens_df_long <- wage_dens_df_long[wage_dens_df_long$LeaveStatus=='ACTIVE',]

everyone$Increase <- everyone$BaseSalary.2017- everyone$BaseSalary.2013 
ggplot(everyone[everyone$LeaveStatus.2013 == "ACTIVE",], aes(BaseSalary.2013)) + geom_histogram(binwidth=.05) + xlim(7, 50)  +
  xlab('Hourly Pay in 5¢ Bins') + theme_few() + ggtitle('Employees Making Under $50 Per Hour')
ggsave("Figures/2013_pay_distribution.pdf", width=8, height=5)
dev.off()


library(extrafont)
font_import()
fonts()
loadfonts()

ggplot(everyone[everyone$LeaveStatus.2013 == "ACTIVE" & everyone$BaseSalary.2013<=12,], aes(BaseSalary.2013)) + geom_histogram(binwidth=.01,alpha=1) + xlim(7, 12)  +
  xlab('Hourly Pay in 1¢ Bins') + theme_few() + ggtitle('Employees Making Under $12 Per Hour') +
  geom_vline(xintercept=8, linetype='dashed', alpha=.5, color='gray') +
  annotate('text', x=8.3,y=5000,label='Threshold\n for Treatment', color='gray') +
ggsave("Figures/2013_pay_distribution_zoom.pdf", width=8, height=5, dpi=1000)


leave11 <- (everyone$LeaveStatus.2011 == 'ACTIVE' & everyone$LeaveStatus.2013 == 'ACTIVE')
in_11 <- everyone$LeaveStatus.2011=='ACTIVE'

leave13 <- (everyone$LeaveStatus.2013 == 'ACTIVE' & everyone$LeaveStatus.2015 == 'ACTIVE')
in_13 <- everyone$LeaveStatus.2013=='ACTIVE'

long13 <- data.frame(
  persist=c(leave11[which(in_11)], leave13[which(in_13)]), 
  UnderMin=c(
            (everyone$BaseSalary.2011 < 8.75)[which(everyone$LeaveStatus.2011=='ACTIVE')],
            (everyone$BaseSalary.2013 < 8.75)[which(everyone$LeaveStatus.2013=='ACTIVE')]),
  year = c(rep('Pre-Increase', nrow(everyone[everyone$LeaveStatus.2011=='ACTIVE',])),
            rep('Post-Increase', nrow(everyone[everyone$LeaveStatus.2013=='ACTIVE',]))),
  employee_id = c(everyone$employeekey[which(in_11)], everyone$employeekey[which(in_13)])
)

long13$year <- factor(long13$year, levels=c('Pre-Increase', "Post-Increase"))

long13 <- na.omit(long13)

mod13Under15 <- felm(persist~UnderMin*year|0|0|employee_id, 
                     long13[c(
                       (everyone$BaseSalary.2013 <15)[which(in_11)],
                       (everyone$BaseSalary.2013 <15)[which(in_13)]),])


mod13 <- felm(persist~UnderMin*year|0|0|employee_id, 
              long13)


leave15 <- (everyone$LeaveStatus.2013 == 'ACTIVE' & everyone$LeaveStatus.2015 == 'ACTIVE')
in_15 <- everyone$LeaveStatus.2015=='ACTIVE'


long15 <-data.frame(
  persist=c(leave11[which(in_11)], leave15[which(in_15)]), 
  UnderMin=c(
    (everyone$BaseSalary.2011 < 9.7)[which(everyone$LeaveStatus.2011=='ACTIVE')],
    (everyone$BaseSalary.2015 < 9.7)[which(everyone$LeaveStatus.2015=='ACTIVE')]),
  year = c(rep('Pre-Increase', nrow(everyone[everyone$LeaveStatus.2011=='ACTIVE',])),
           rep('Post-Increase', nrow(everyone[everyone$LeaveStatus.2015=='ACTIVE',]))),
  employee_id = c(everyone$employeekey[which(in_11)], everyone$employeekey[which(in_15)])
)

long15 <- na.omit(long15)
long15$year <- factor(long15$year, levels=c('Pre-Increase', "Post-Increase"))


mod15Under15 <- felm(persist~UnderMin*year|0|0|employee_id, 
                     long15[c(
                       (everyone$BaseSalary.2011 <15)[which(in_11)],
                       (everyone$BaseSalary.2011 <15)[which(in_15)]),])

mod15 <- felm(persist~UnderMin*year|0|0|employee_id, long15)
stargazer(mod13,mod13Under15,
          covariate.labels=c("Affected By MW Increase", 'Post Increase','Affected By Increase * Post Increase'),
          column.labels='Stayed at Job',
          column.separate = c(2,2,2),
          add.lines = 
            list(c("Included Employees", "All Hourly", "Hourly Under \\$15")),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=c("f", 'ser'),
          title="Employment effects Diff-in-Diff",
          out="Tables/employment_effects", label="employment_effects", dep.var.labels.include=FALSE) 